set more off
clear all

global DIR Dropbox\INE\data\bhh
global IPDIR Dropbox\INE\data\matchip
global INNODIR Dropbox\INE\data\Innovacion

cap log close
log using $DIR\extract,t replace

**********************************************************************************
*** This do-fie extracts the estimation sample for Fink, Hall, and Helmers (2019),
*** EDCC revision. 
*** BHH Augsut 2019
**********************************************************************************

use $DIR\enia_inno_ip, clear
label var innosmpl "D (Innovation survey sample)"

rename orgjur legalorg
rename forpro ownership
rename region region
rename ciiu2 isic2
rename ciiu3 isic3
rename hrsdirec dir_hours
rename hrsprop owner_hours
rename hrsadmin admin_hours
rename hrsespec skill_hours
rename teed est_size
rename espprh emp_skill_m
rename espprm emp_skill_f
rename admprh emp_admin_m
rename admprm emp_admin_f
rename tothom emp_m
rename totmuj emp_f
rename salter fa_land
rename saledi fa_build
rename salmaq fa_mach
rename salveh fa_veh
rename salhab fa_have
rename acthab asset_acct
rename pasdeb liab_acct
rename gashab spend_acct
rename ingdeb rev_acct
rename matpco mat_fuel
rename mateco purch_mat
rename mtmpval materials
*replace materials = mat_fuel + mconval + msubval + mrevval if materials==. | materials==0
rename ingtot totrev
rename ingtotal totrev_own
rename va valadd
rename ingexp exports

rename ip n_ip
rename ip_app n_ip_reg
rename ip_knowhow n_knowhow
rename ip_pat_ext npat_ext
rename ip_pat_own npat_own
rename ipatval ipsales

rename d_appln_c napp_deseno
rename db_appln_c napp_dibujo
rename p_appln_c napp_pat
rename tm_appln_c napp_tm
rename u_appln_c napp_um

rename rd_tot inno_tot
rename rdexp rd_exptot
label var rd_exptot "Total R&D spending, created by CH"

egen cap_exp = rowtotal(cb*)
label var cap_exp "Capital expenditure"
egen cap_sale = rowtotal(vbu*)
label var cap_sale "Sale of used capital"

rename ino_des design_inno
rename ino_orgadmin mgmt_inno
rename ino_orgwork org_inno
rename ino_pack package_inno
rename ino_procnf procf_inno
rename ino_procnm procm_inno
rename ino_prodnf prodf_inno
rename ino_prodnm prodm_inno

rename soc_coop* coop*

gen employ2 = emp_m + emp_f
gen employ = tocc + tosc
rename thhano manhours
gen payroll = regemp + rememp
label var employ "Total employment in firm (toxx, tosc)"
label var employ2 "Total employment in firm (male+female)"
label var manhours "Annual man hours"
label var payroll "Total payroll in firm, millions of pesos"

gen napp_design = napp_deseno + napp_dibujo
drop napp_deseno napp_dibujo
label var napp_design "N of design apps this year"

label def olbl 1 "domestic" 2 "foreign" 3 "mixed" 4 "public"
label val ownership olbl
replace ownership = 1 if ownership==5  /* fix rogue observation */

*** recode a few legal orgs
replace legalorg = 1 if legalorg==10
replace legalorg = 2 if legalorg==20
replace legalorg = 4 if legalorg==41
replace legalorg = 5 if legalorg==42
label def tlbl 1 "individual" 2 "srl" 3 "collective" 4 "sac" 5 "saa" 6 "cooperative" 7 "public" 8 "other"
label val legalorg tlbl

gen sic2d = int(isic3/100)
label def ilbl 15 "food products and beverages" 
label def ilbl 16 "tobacco products", add
label def ilbl 17 "textiles", add
label def ilbl 18 "wearing apparel; dressing and dyeing of fur", add
label def ilbl 19 "leather preparation & goods", add
label def ilbl 20 "wood, cork and straw products, ex furniture", add
label def ilbl 21 "paper and paper products", add
label def ilbl 22 "publishing, printing and reproduction of recorded media", add
label def ilbl 23 "coke, refined petroleum products and nuclear fuel", add
label def ilbl 24 "chemicals and chemical products", add
label def ilbl 25 "rubber and plastics products", add
label def ilbl 26 "other non-metallic mineral products", add
label def ilbl 27 "basic metals", add
label def ilbl 28 "fabricated metal products, except machinery and equipment", add
label def ilbl 29 "machinery and equipment n.e.c.", add
label def ilbl 30 "office, accounting and computing machinery", add
label def ilbl 31 "electrical machinery and apparatus n.e.c.", add
label def ilbl 32 "radio, television and communication equipment and apparatus", add
label def ilbl 33 "medical, precision and optical instruments, watches and clocks", add
label def ilbl 34 "motor vehicles, trailers and semi-trailers", add
label def ilbl 35 "other transport equipment", add
label def ilbl 36 "furniture; manufacturing n.e.c.", add
label def ilbl 37 "recycling", add
label val sic2d ilbl
label var sic2d "2-digit ISIC code"

*** Regions
label def rlbl 1 "Tarapaca" 2 "Antofagasta" 3 "Atacama" 4 "Coquimbo" 5 "Valparaiso"
label def rlbl 6 "Ohiggins" 7 "Maule" 8 "Bio-Bio" 9 "La Araucania" 10 "Los Lagos", modify
label def rlbl 11 "Aisen" 12 "Mag. y Antartica" 13 "Santiago" 14 "Los Rios" 15 "Arica Parinacota", modify
label def rlbl 16 "Multi", modify
label val region rlbl

label var rd_all "Total R&D spending within the co"
lab var costot "Purchased goods and services"

keep id year legalorg ownership region* isic2 isic3 sic2d   *hours               ///
     nestab est_size emp* fa* *acct materials purch_mat mat_fuel totrev* valadd  ///
	 exports capstk cap_error napp* innosmpl n_ip* npat* n_knowhow ipsales rd    ///
	 inno_tot rd_all rd_acq rd_exptot npat* coop* *_inno ef_envir ef_proc        ///
	 ef_prod ef_qual ef_qualcont bar_expers bar_fin bar_info bar_lag bar_risk    ///
	 soc_ext_client soc_ext_conf soc_ext_consul soc_ext_coop soc_ext_pinst       ///
	 soc_ext_privinst soc_ext_rev soc_ext_uni soc_int_rd soc_int_rdgp soc_*_a    ///
	 soc_*_b payroll cap_exp cap_sale costot tdeprec elecomp employ2 manhours
tab ownership
tab legalorg
tab region
tab sic2d
	 
sum
do $DIR\cleanfile
save $DIR\enia_inno_ip_ex, replace	 

log close
